import scrapy


class NewsSpider(scrapy.Spider):
    name = "news"
    #allowed_domains = ["news.163.com"]
    start_urls = ["https://news.sina.com.cn/"]
    # start_urls = ["https://news.sina.com.cn/w/2024-11-27/doc-incxnfms6892943.shtml"]

    def parse(self, response):
        node = response.xpath('//div[@class="cNavLinks"]/a/@href').extract()
        yield scrapy.Request(url=node[5], callback=self.parse_one)

    def parse_one(self, response):
        news_node = response.xpath('//div[@id]/div/h2')
        print(len(news_node))
        for news in news_node:
            new_info ={}
            new_info["title"] = news.xpath("./a/text()").extract_first()
            print(new_info)
            yield new_info
        